############## study 2 #######################
rm(list = ls())


library(ggplot2)
library(tidyverse)
library(reshape)
library(ggrepel)
library(RVAideMemoire)
library(pwr)
library(broom.mixed)
library(huxtable)
library(ggstance)
library(jtools)
library(effectsize)
library(lsr)
library(stargazer)

################### load data ##################
d1 = read.csv("vaccine_motivation_survey.csv")

# check unique values for each variable 
for(col in names(d1)){
  print(unique(d1[, col]))
}

d1[d1 == ""] = NA

# check missing values in the data
colSums(is.na(d1))

################## remove missing data ##########
# remove one missing data in "hisp"
# remove 12 missing data on dependent variables (motivations) 
cols = c("hisp", "vax_protectself", "vax_illness", "vax_econ", "vax_finan")
dta = d1[!rowSums(is.na(d1[cols])), ]

# given we are interested in participants with self-reported partisanship,
# we remove respondent who are "Something else"
dta = dta[dta$party != "Something else", ]

########### in total we have 2606 observations ###########
# recoding demographic factors 
dta$age = as.numeric(factor(dta$age, 
                  levels = c("18 - 24", "25 - 34", "35 - 44", "45 - 54",
                             "55 - 64", "65 - 74", "75 - 84", "85 or older")))
# group people at age 65 or above
dta$age[dta$age >=6 ] = 6


dta$edu = as.numeric(factor(dta$edu, 
                    levels = c("Did not graduate from high school",
                             "High school diploma or the equivalent (GED)",
                             "Some college",
                             "Associate's degree", 
                             "Bachelor's degree",                        
                             "Master's degree",                           
                             "Professional or doctorate degree")))
# group people with High school or lower together, and reorder the edu
dta$edu[dta$edu <= 2] = 2
dta$edu = dta$edu - 1

dta$interest_pol = as.numeric(factor(dta$interest_pol, 
                              levels = c("Not at all interested",
                                         "Not very interested",
                                         "Somewhat interested",
                                         "Very interested", 
                                         "Extremely interested")))

# count the number in each race type (people can choose more than one)
race_type =  c("White","Black or African American", "American Indian or Alaska Native",
        "Asian/Pacific Islander", "Multi-racial", "Other")
for(r in race_type){
  cnt = 0
  for(i in seq(1:dim(dta)[1])){
    if(grepl(r, dta[i, c("race")])){
      cnt = cnt +1
    }
  }
  print(r)
  print(cnt)
}

# recoding race and create new variable composed of 4 types 
dta$new_race = dta$race
dta$new_race[!dta$race %in% c("White",
                              "Black or African American",     
                              "Asian/Pacific Islander") ] = "Multi-racial or Other"
dta$new_race = factor(dta$new_race, 
                      levels = c("White",
                                 "Black or African American",    
                                 "Asian/Pacific Islander", 
                                 "Multi-racial or Other"))
table(dta$new_race)

# recoding the gender variable into male or non-male
dta$new_male = factor(ifelse(dta$gender == "Male", "Male", "non-Male"), 
                      levels = c("non-Male", "Male"))


################### OLS ##########################
# regress each motivation on partisanship, and control other demographic variables
t_econ = lm(vax_econ ~ party  + age + new_male + edu + hisp + new_race, data = dta)
t_ill = lm(vax_illness ~ party  + age + new_male + edu + hisp + new_race, data = dta)
t_finan = lm(vax_finan ~ party  + age + new_male + edu + hisp + new_race, data = dta)
t_self = lm(vax_protectself ~ party  + age + new_male+  edu + hisp + new_race, data = dta)

stargazer(t_econ, t_ill, t_finan, t_self, type = "text", 
          dep.var.labels = c("Getting the economy",
                             "Preventing more illness",
                             "Going back to work",
                             "Protecting myself"),
          covariate.labels = c("Party: Independent",
                               "Party: Republican",
                               "Age", 
                               "Gender: Male",
                               "Education",
                               "Hispanic: Yes",
                               "Black or African American",
                               "Asian/Pacific Islander",
                               "Multi-racial or Other"), out="vax_motivation_models.htm")


# plot the study 2 figure
plot_summs(t_econ, t_ill, t_finan, t_self, scale = TRUE, robust = TRUE, 
           model.names = c("Economy", "Health", "Personal finance", 'Protect self'),  
           legend.title = "DV: Vaccination \n Motivations", colors = c('black', 'gray',  'black', 'grey'),
           coefs = c("Republican \n (compared to Democrat)" = "partyRepublican", 
                     "Independent \n (compared to Democrat)" = "partyIndependent", 
                     "Age (in groups)" = "age", 
                     "Male"="new_male", 
                     "Education level" = "edu", 
                     "Hispanic"="hispYes", 
                     "Black or African American" = "new_raceBlack or African American",
                     "American Indian or Alaska Native"= "new_raceAmerican Indian or Alaska Native",
                     "Asian/Pacific Islander"="new_raceAsian/Pacific Islander",
                     "Multi-racial or Other"="new_raceMulti-racial or Other"))

######### correlation between economic motivator vs financial motivator #########

fe_rep = lm(vax_finan ~ vax_econ + age + new_male + edu + hisp + new_race, 
           data = dta[dta$party == "Republican", ])
fe_dem = lm(vax_finan ~ vax_econ + age + new_male + edu + hisp + new_race, 
           data = dta[dta$party == "Democrat", ])
fe_indp = lm(vax_finan ~ vax_econ + age + new_male + edu + hisp + new_race, 
           data = dta[dta$party == "Independent", ])

stargazer(fe_rep, fe_dem, fe_indp, type='html', out = "fe_model.htm", align = TRUE)

########## permutation test ####################
# run permutation test on each motivator between Republicans and Democratss
perm_econ = perm.t.test(dta$vax_econ[dta$party == "Republican"], 
            dta$vax_econ[dta$party == "Democrat"],)

perm_illness = perm.t.test(dta$vax_illness[dta$party == "Republican"], 
            dta$vax_illness[dta$party == "Democrat"])

perm_finan = perm.t.test(dta$vax_finan[dta$party == "Republican"], 
            dta$vax_finan[dta$party == "Democrat"])

perm_protectself = perm.t.test(dta$vax_protectself[dta$party == "Republican"], 
            dta$vax_protectself[dta$party == "Democrat"])
wilcox.test(immer$Y1, immer$Y2, paired=TRUE) 

# run power 2 sample test on the effect size of each motivation
# get sample size of Republicans, and Democrats
dta %>% group_by(party) %>% tally()

# assuming, alpha = 0.05
pwr.t2n.test(d = perm_econ$estimate[1] - perm_econ$estimate[2], 
             n1 = 558, n2 = 1614, sig.level = 0.01, alternative = "greater")
pwr.t2n.test(d = perm_finan$estimate[1] - perm_finan$estimate[2], 
             n1 = 558, n2 = 1614, sig.level = 0.01, alternative = "greater")
pwr.t2n.test(d = perm_illness$estimate[1] - perm_illness$estimate[2], 
             n1 = 558, n2 = 1614, sig.level = 0.01, alternative = "less")
pwr.t2n.test(d = perm_protectself$estimate[1] - perm_protectself$estimate[2], 
             n1 = 558, n2 = 1614, sig.level = 0.01, alternative = "less")

# effect size: eta sq 
etaSquared(aov(vax_econ ~ party, data = dta))
etaSquared(aov(vax_finan ~ party, data = dta))
etaSquared(aov(vax_illness ~ party, data = dta))
etaSquared(aov(vax_protectself ~ party, data = dta))


# though we ran permuation test instead of t-test, we tried t-test with bonferroni correction 
dta1 = dta[dta$party != "Independent", ]
pairwise.t.test(dta1$vax_econ, dta1$party, p.adj = "bonferroni")
pairwise.t.test(dta1$vax_illness, dta1$party, p.adj = "bonferroni")
pairwise.t.test(dta1$vax_finan, dta1$party, p.adj = "bonferroni")
pairwise.t.test(dta1$vax_protectself, dta1$party, p.adj = "bonferroni")



# vax_status
dta1$new_vax_status = ifelse(dta1$vax_status == "Got first dose of two-dose Pfizer or Moderna COVID-19 vaccine", 
                            "partial_vax", "full_vax")


vax_econ = lm(vax_econ ~ party * new_vax_status  + age + new_male + edu + hisp + new_race, data = dta1)
vax_ill = lm(vax_illness ~ party * new_vax_status  + age + new_male + edu + hisp + new_race, data = dta1)
vax_finan = lm(vax_finan ~ party * new_vax_status  + age + new_male + edu + hisp + new_race, data = dta1)
vax_protect = lm(vax_protectself ~ party * new_vax_status  + age + new_male + edu + hisp + new_race, data = dta1)

stargazer(vax_econ, vax_ill, vax_finan, vax_protect, type = "text",
          dep.var.labels = c("Getting the economy",
                             "Preventing more illness",
                             "Going back to work",
                             "Protecting myself"),
          covariate.labels = c("party: Republican",
                               "Partially vaccinated",
                               "Age",
                               "Gender: Male",
                               "Education",
                               "Hispanic: Yes",
                               "Black or African American",
                               "Asian/Pacific Islander",
                               "Multi-racial or Other",
                               "Republican * partially vaccinated"), out="vax_status_models.htm")




